


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Deploy Quantized Models using Torch-TensorRT &mdash; Torch-TensorRT v2.5.0.dev0+60ec67b documentation</title>
  

  
  
  
  

  

  
  
    

  

  <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery-binder.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery-dataframe.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery-rendered-html.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/collapsible-lists/css/tree_view.css" type="text/css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
    <link rel="index" title="Index" href="../../../genindex.html" />
    <link rel="search" title="Search" href="../../../search.html" />
    <link rel="next" title="TensorRT Backend for torch.compile" href="../../../dynamo/torch_compile.html" />
    <link rel="prev" title="Torch Compile Advanced Usage" href="torch_compile_advanced_usage.html" />
  <!-- Google Tag Manager -->
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
    })(window,document,'script','dataLayer','');</script>
    <!-- End Google Tag Manager -->
  

  
  <script src="../../../_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.2/css/all.css" integrity="sha384-vSIIfh2YWi9wW0r9iZe7RJPrKwp6bG+s9QZMoITbCckVJqGCCRhc+ccxNcdpHuYu" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>

          <li class="main-menu-item">
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Learn
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/get-started">
                  <span class=dropdown-title>Get Started</span>
                  <p>Run PyTorch locally or get started quickly with one of the supported cloud platforms</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials">
                  <span class="dropdown-title">Tutorials</span>
                  <p>Whats new in PyTorch tutorials</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/beginner/basics/intro.html">
                  <span class="dropdown-title">Learn the Basics</span>
                  <p>Familiarize yourself with PyTorch concepts and modules</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/recipes/recipes_index.html">
                  <span class="dropdown-title">PyTorch Recipes</span>
                  <p>Bite-size, ready-to-deploy PyTorch code examples</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/beginner/introyt.html">
                  <span class="dropdown-title">Intro to PyTorch - YouTube Series</span>
                  <p>Master PyTorch basics with our engaging YouTube tutorial series</p>
                </a>
              </div>
            </div>
          </li>

          <li>
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Ecosystem
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem">
                  <span class="dropdown-title">Tools</span>
                  <p>Learn about the tools and frameworks in the PyTorch Ecosystem</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/#community-module">
                  <span class=dropdown-title>Community</span>
                  <p>Join the PyTorch developer community to contribute, learn, and get your questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://discuss.pytorch.org/" target="_blank">
                  <span class=dropdown-title>Forums</span>
                  <p>A place to discuss PyTorch code, issues, install, research</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/resources">
                  <span class=dropdown-title>Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem/contributor-awards-2023">
                  <span class="dropdown-title">Contributor Awards - 2023</span>
                  <p>Award winners announced at this year's PyTorch Conference</p>
                </a>
              </div>
            </div>
          </li>

          <li>
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Edge
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/edge">
                  <span class="dropdown-title">About PyTorch Edge</span>
                  <p>Build innovative and privacy-aware AI experiences for edge devices</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/executorch-overview">
                  <span class="dropdown-title">ExecuTorch</span>
                  <p>End-to-end solution for enabling on-device inference capabilities across mobile and edge devices</p>
                </a>
              </div>
            </div>  
          </li>

          <li class="main-menu-item">
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Docs
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/docs/stable/index.html">
                  <span class="dropdown-title">PyTorch</span>
                  <p>Explore the documentation for comprehensive guidance on how to use PyTorch</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/pytorch-domains">
                  <span class="dropdown-title">PyTorch Domains</span>
                  <p>Read the PyTorch Domains documentation to learn more about domain-specific libraries</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Blogs & News 
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/blog/">
                  <span class="dropdown-title">PyTorch Blog</span>
                  <p>Catch up on the latest technical news and happenings</p>
                </a>
                 <a class="nav-dropdown-item" href="https://pytorch.org/community-blog">
                  <span class="dropdown-title">Community Blog</span>
                  <p>Stories from the PyTorch ecosystem</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/videos">
                  <span class="dropdown-title">Videos</span>
                  <p>Learn about the latest PyTorch tutorials, new, and more </p>
                <a class="nav-dropdown-item" href="https://pytorch.org/community-stories">
                  <span class="dropdown-title">Community Stories</span>
                  <p>Learn how our community solves real, everyday machine learning problems with PyTorch</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/events">
                  <span class="dropdown-title">Events</span>
                  <p>Find events, webinars, and podcasts</p>
                </a>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                About
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/foundation">
                  <span class="dropdown-title">PyTorch Foundation</span>
                  <p>Learn more about the PyTorch Foundation</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/governing-board">
                  <span class="dropdown-title">Governing Board</span>
                  <p></p>
                </a>
              </div>
            </div>
          </li>

          <li class="main-menu-item">
            <div class="no-dropdown">
              <a href="https://pytorch.org/join" data-cta="join">
                Become a Member
              </a>
            </div>
          </li>
          <li>
           <div class="main-menu-item">
             <a href="https://github.com/pytorch/pytorch" class="github-icon">
             </a>
           </div>
          </li>
          <!--- TODO: This block adds the search icon to the nav bar. We will enable it later. 
          <li>
            <div class="main-menu-item">
             <a href="https://github.com/pytorch/pytorch" class="search-icon">
             </a>
            </div>
          </li>
          --->
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>
  </div>
</div>

<body class="pytorch-body">

   

    

    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">
            

            
              
              
                <div class="version">
                  v2.5.0.dev0+60ec67b
                </div>
              
            

            


  


<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
    <input type="text" name="q" placeholder="Search Docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

            
          </div>

          
            
            
              
            
            
              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/quick_start.html">Quick Start</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/torch_tensorrt_explained.html">Torch-TensorRT Explained</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/dynamic_shapes.html">Dynamic shapes with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/saving_models.html">Saving models compiled with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/runtime.html">Deploying Torch-TensorRT Programs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/using_dla.html">DLA</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_advanced_usage.html">Torch Compile Advanced Usage</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Deploy Quantized Models using Torch-TensorRT</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../dynamo/torch_compile.html">TensorRT Backend for <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code></a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dynamo/dynamo_export.html">Compiling Exported Programs with Torch-TensorRT</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">TorchScript Frontend</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/creating_torchscript_module_in_python.html">Creating a TorchScript Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/creating_torchscript_module_in_python.html#working-with-torchscript-in-python">Working with TorchScript in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/creating_torchscript_module_in_python.html#saving-torchscript-module-to-disk">Saving TorchScript Module to Disk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/getting_started_with_python_api.html">Using Torch-TensorRT in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/getting_started_with_cpp_api.html">Using Torch-TensorRT in  C++</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/ptq.html">Post Training Quantization (PTQ)</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">FX Frontend</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../fx/getting_started_with_fx_path.html">Torch-TensorRT (FX Frontend) User Guide</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../serving_torch_tensorrt_with_triton.html">Serving a Torch-TensorRT model with Triton</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../notebooks.html">Example notebooks</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_resnet_example.html">Compiling ResNet using the Torch-TensorRT <cite>torch.compile</cite> Backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_transformers_example.html">Compiling a Transformer using torch.compile and TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_advanced_usage.html">Torch Compile Advanced Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_stable_diffusion.html">Torch Compile Stable Diffusion</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_export_cudagraphs.html">Torch Export with Cudagraphs</a></li>
<li class="toctree-l1"><a class="reference internal" href="custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Deploy Quantized Models using Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Python API Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/torch_tensorrt.html">torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/dynamo.html">torch_tensorrt.dynamo</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/logging.html">torch_tensorrt.logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/fx.html">torch_tensorrt.fx</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/ts.html">torch_tensorrt.ts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/ptq.html">torch_tensorrt.ts.ptq</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">C++ API Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/torch_tensort_cpp.html">Torch-TensorRT C++ API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt.html">Namespace torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt__logging.html">Namespace torch_tensorrt::logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt__torchscript.html">Namespace torch_tensorrt::torchscript</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt__ptq.html">Namespace torch_tensorrt::ptq</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">CLI Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../cli/torchtrtc.html">torchtrtc</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Contributor Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/system_overview.html">System Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/dynamo_converters.html">Writing Dynamo Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/writing_dynamo_aten_lowering_passes.html">Writing Dynamo ATen Lowering Passes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/ts_converters.html">Writing TorchScript Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/useful_links.html">Useful Links for Torch-TensorRT Development</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Indices</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../indices/supported_ops.html">Operators Supported</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">
    
      <li>
        <a href="../../../index.html">
          
            Docs
          
        </a> &gt;
      </li>

        
      <li>Deploy Quantized Models using Torch-TensorRT</li>
    
    
      <li class="pytorch-breadcrumbs-aside">
        
            
            <a href="../../../_sources/tutorials/_rendered_examples/dynamo/vgg16_fp8_ptq.rst.txt" rel="nofollow"><img src="../../../_static/images/view-page-source-icon.svg"></a>
          
        
      </li>
    
  </ul>

  
</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">

        
          <!-- Google Tag Manager (noscript) -->
          <noscript><iframe src="https://www.googletagmanager.com/ns.html?id="
          height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
          <!-- End Google Tag Manager (noscript) -->
          
          <div class="rst-content">
          
            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
              
  <div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">Note</p>
<p><a class="reference internal" href="#sphx-glr-download-tutorials-rendered-examples-dynamo-vgg16-fp8-ptq-py"><span class="std std-ref">Go to the end</span></a>
to download the full example code</p>
</div>
<section class="sphx-glr-example-title" id="deploy-quantized-models-using-torch-tensorrt">
<span id="vgg16-fp8-ptq"></span><span id="sphx-glr-tutorials-rendered-examples-dynamo-vgg16-fp8-ptq-py"></span><h1>Deploy Quantized Models using Torch-TensorRT<a class="headerlink" href="#deploy-quantized-models-using-torch-tensorrt" title="Permalink to this heading">¶</a></h1>
<p>Here we demonstrate how to deploy a model quantized to FP8 using the Dynamo frontend of Torch-TensorRT</p>
<section id="imports-and-model-definition">
<h2>Imports and Model Definition<a class="headerlink" href="#imports-and-model-definition" title="Permalink to this heading">¶</a></h2>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>

<span class="kn">import</span> <span class="nn">modelopt.torch.quantization</span> <span class="k">as</span> <span class="nn">mtq</span>
<span class="kn">import</span> <span class="nn">torch</span>
<span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
<span class="kn">import</span> <span class="nn">torch.nn.functional</span> <span class="k">as</span> <span class="nn">F</span>
<span class="kn">import</span> <span class="nn">torch_tensorrt</span> <span class="k">as</span> <span class="nn">torchtrt</span>
<span class="kn">import</span> <span class="nn">torchvision.datasets</span> <span class="k">as</span> <span class="nn">datasets</span>
<span class="kn">import</span> <span class="nn">torchvision.transforms</span> <span class="k">as</span> <span class="nn">transforms</span>
<span class="kn">from</span> <span class="nn">modelopt.torch.quantization.utils</span> <span class="kn">import</span> <span class="n">export_torch_mode</span>


<span class="k">class</span> <span class="nc">VGG</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">layer_spec</span><span class="p">,</span> <span class="n">num_classes</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">init_weights</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
        <span class="nb">super</span><span class="p">(</span><span class="n">VGG</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>

        <span class="n">layers</span> <span class="o">=</span> <span class="p">[]</span>
        <span class="n">in_channels</span> <span class="o">=</span> <span class="mi">3</span>
        <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">layer_spec</span><span class="p">:</span>
            <span class="k">if</span> <span class="n">l</span> <span class="o">==</span> <span class="s2">&quot;pool&quot;</span><span class="p">:</span>
                <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">MaxPool2d</span><span class="p">(</span><span class="n">kernel_size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">stride</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
            <span class="k">else</span><span class="p">:</span>
                <span class="n">layers</span> <span class="o">+=</span> <span class="p">[</span>
                    <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">l</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
                    <span class="n">nn</span><span class="o">.</span><span class="n">BatchNorm2d</span><span class="p">(</span><span class="n">l</span><span class="p">),</span>
                    <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span>
                <span class="p">]</span>
                <span class="n">in_channels</span> <span class="o">=</span> <span class="n">l</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">features</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span><span class="o">*</span><span class="n">layers</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">avgpool</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">AdaptiveAvgPool2d</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">512</span> <span class="o">*</span> <span class="mi">1</span> <span class="o">*</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">4096</span><span class="p">),</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(),</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">4096</span><span class="p">),</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(),</span>
            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">4096</span><span class="p">,</span> <span class="n">num_classes</span><span class="p">),</span>
        <span class="p">)</span>
        <span class="k">if</span> <span class="n">init_weights</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_weights</span><span class="p">()</span>

    <span class="k">def</span> <span class="nf">_initialize_weights</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">modules</span><span class="p">():</span>
            <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">):</span>
                <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">kaiming_normal_</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;fan_out&quot;</span><span class="p">,</span> <span class="n">nonlinearity</span><span class="o">=</span><span class="s2">&quot;relu&quot;</span><span class="p">)</span>
                <span class="k">if</span> <span class="n">m</span><span class="o">.</span><span class="n">bias</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
                    <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
            <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">BatchNorm2d</span><span class="p">):</span>
                <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
                <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
            <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">):</span>
                <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">normal_</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mf">0.01</span><span class="p">)</span>
                <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">features</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">avgpool</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
        <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">flatten</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
        <span class="k">return</span> <span class="n">x</span>


<span class="k">def</span> <span class="nf">vgg16</span><span class="p">(</span><span class="n">num_classes</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">init_weights</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
    <span class="n">vgg16_cfg</span> <span class="o">=</span> <span class="p">[</span>
        <span class="mi">64</span><span class="p">,</span>
        <span class="mi">64</span><span class="p">,</span>
        <span class="s2">&quot;pool&quot;</span><span class="p">,</span>
        <span class="mi">128</span><span class="p">,</span>
        <span class="mi">128</span><span class="p">,</span>
        <span class="s2">&quot;pool&quot;</span><span class="p">,</span>
        <span class="mi">256</span><span class="p">,</span>
        <span class="mi">256</span><span class="p">,</span>
        <span class="mi">256</span><span class="p">,</span>
        <span class="s2">&quot;pool&quot;</span><span class="p">,</span>
        <span class="mi">512</span><span class="p">,</span>
        <span class="mi">512</span><span class="p">,</span>
        <span class="mi">512</span><span class="p">,</span>
        <span class="s2">&quot;pool&quot;</span><span class="p">,</span>
        <span class="mi">512</span><span class="p">,</span>
        <span class="mi">512</span><span class="p">,</span>
        <span class="mi">512</span><span class="p">,</span>
        <span class="s2">&quot;pool&quot;</span><span class="p">,</span>
    <span class="p">]</span>
    <span class="k">return</span> <span class="n">VGG</span><span class="p">(</span><span class="n">vgg16_cfg</span><span class="p">,</span> <span class="n">num_classes</span><span class="p">,</span> <span class="n">init_weights</span><span class="p">)</span>


<span class="n">PARSER</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span>
    <span class="n">description</span><span class="o">=</span><span class="s2">&quot;Load pre-trained VGG model and then tune with FP8 and PTQ. For having a pre-trained VGG model, please refer to https://github.com/pytorch/TensorRT/tree/main/examples/int8/training/vgg16&quot;</span>
<span class="p">)</span>
<span class="n">PARSER</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
    <span class="s2">&quot;--ckpt&quot;</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">&quot;Path to the pre-trained checkpoint&quot;</span>
<span class="p">)</span>
<span class="n">PARSER</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
    <span class="s2">&quot;--batch-size&quot;</span><span class="p">,</span>
    <span class="n">default</span><span class="o">=</span><span class="mi">128</span><span class="p">,</span>
    <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
    <span class="n">help</span><span class="o">=</span><span class="s2">&quot;Batch size for tuning the model with PTQ and FP8&quot;</span><span class="p">,</span>
<span class="p">)</span>

<span class="n">args</span> <span class="o">=</span> <span class="n">PARSER</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>

<span class="n">model</span> <span class="o">=</span> <span class="n">vgg16</span><span class="p">(</span><span class="n">num_classes</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">init_weights</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="load-the-pre-trained-model-weights">
<h2>Load the pre-trained model weights<a class="headerlink" href="#load-the-pre-trained-model-weights" title="Permalink to this heading">¶</a></h2>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">ckpt</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">ckpt</span><span class="p">)</span>
<span class="n">weights</span> <span class="o">=</span> <span class="n">ckpt</span><span class="p">[</span><span class="s2">&quot;model_state_dict&quot;</span><span class="p">]</span>

<span class="k">if</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">device_count</span><span class="p">()</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
    <span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span>

    <span class="n">new_state_dict</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span>
    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">weights</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
        <span class="n">name</span> <span class="o">=</span> <span class="n">k</span><span class="p">[</span><span class="mi">7</span><span class="p">:]</span>  <span class="c1"># remove `module.`</span>
        <span class="n">new_state_dict</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span>
    <span class="n">weights</span> <span class="o">=</span> <span class="n">new_state_dict</span>

<span class="n">model</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">weights</span><span class="p">)</span>
<span class="c1"># Don&#39;t forget to set the model to evaluation mode!</span>
<span class="n">model</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="load-training-dataset-and-define-loss-function-for-ptq">
<h2>Load training dataset and define loss function for PTQ<a class="headerlink" href="#load-training-dataset-and-define-loss-function-for-ptq" title="Permalink to this heading">¶</a></h2>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">training_dataset</span> <span class="o">=</span> <span class="n">datasets</span><span class="o">.</span><span class="n">CIFAR10</span><span class="p">(</span>
    <span class="n">root</span><span class="o">=</span><span class="s2">&quot;./data&quot;</span><span class="p">,</span>
    <span class="n">train</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
    <span class="n">download</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
    <span class="n">transform</span><span class="o">=</span><span class="n">transforms</span><span class="o">.</span><span class="n">Compose</span><span class="p">(</span>
        <span class="p">[</span>
            <span class="n">transforms</span><span class="o">.</span><span class="n">RandomCrop</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">4</span><span class="p">),</span>
            <span class="n">transforms</span><span class="o">.</span><span class="n">RandomHorizontalFlip</span><span class="p">(),</span>
            <span class="n">transforms</span><span class="o">.</span><span class="n">ToTensor</span><span class="p">(),</span>
            <span class="n">transforms</span><span class="o">.</span><span class="n">Normalize</span><span class="p">((</span><span class="mf">0.4914</span><span class="p">,</span> <span class="mf">0.4822</span><span class="p">,</span> <span class="mf">0.4465</span><span class="p">),</span> <span class="p">(</span><span class="mf">0.2023</span><span class="p">,</span> <span class="mf">0.1994</span><span class="p">,</span> <span class="mf">0.2010</span><span class="p">)),</span>
        <span class="p">]</span>
    <span class="p">),</span>
<span class="p">)</span>
<span class="n">training_dataloader</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span>
    <span class="n">training_dataset</span><span class="p">,</span>
    <span class="n">batch_size</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">,</span>
    <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
    <span class="n">num_workers</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
    <span class="n">drop_last</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="p">)</span>

<span class="n">data</span> <span class="o">=</span> <span class="nb">iter</span><span class="p">(</span><span class="n">training_dataloader</span><span class="p">)</span>
<span class="n">images</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>

<span class="n">crit</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="define-calibration-loop-for-quantization">
<h2>Define Calibration Loop for quantization<a class="headerlink" href="#define-calibration-loop-for-quantization" title="Permalink to this heading">¶</a></h2>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">calibrate_loop</span><span class="p">(</span><span class="n">model</span><span class="p">):</span>
    <span class="c1"># calibrate over the training dataset</span>
    <span class="n">total</span> <span class="o">=</span> <span class="mi">0</span>
    <span class="n">correct</span> <span class="o">=</span> <span class="mi">0</span>
    <span class="n">loss</span> <span class="o">=</span> <span class="mf">0.0</span>
    <span class="k">for</span> <span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="ow">in</span> <span class="n">training_dataloader</span><span class="p">:</span>
        <span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">labels</span><span class="o">.</span><span class="n">cuda</span><span class="p">(</span><span class="n">non_blocking</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
        <span class="n">out</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
        <span class="n">loss</span> <span class="o">+=</span> <span class="n">crit</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span>
        <span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
        <span class="n">total</span> <span class="o">+=</span> <span class="n">labels</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
        <span class="n">correct</span> <span class="o">+=</span> <span class="p">(</span><span class="n">preds</span> <span class="o">==</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>

    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;PTQ Loss: </span><span class="si">{:.5f}</span><span class="s2"> Acc: </span><span class="si">{:.2f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">loss</span> <span class="o">/</span> <span class="n">total</span><span class="p">,</span> <span class="mi">100</span> <span class="o">*</span> <span class="n">correct</span> <span class="o">/</span> <span class="n">total</span><span class="p">))</span>
</pre></div>
</div>
</section>
<section id="tune-the-pre-trained-model-with-fp8-and-ptq">
<h2>Tune the pre-trained model with FP8 and PTQ<a class="headerlink" href="#tune-the-pre-trained-model-with-fp8-and-ptq" title="Permalink to this heading">¶</a></h2>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">quant_cfg</span> <span class="o">=</span> <span class="n">mtq</span><span class="o">.</span><span class="n">FP8_DEFAULT_CFG</span>
<span class="c1"># PTQ with in-place replacement to quantized modules</span>
<span class="n">mtq</span><span class="o">.</span><span class="n">quantize</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">quant_cfg</span><span class="p">,</span> <span class="n">forward_loop</span><span class="o">=</span><span class="n">calibrate_loop</span><span class="p">)</span>
<span class="c1"># model has FP8 qdq nodes at this point</span>
</pre></div>
</div>
</section>
<section id="inference">
<h2>Inference<a class="headerlink" href="#inference" title="Permalink to this heading">¶</a></h2>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Load the testing dataset</span>
<span class="n">testing_dataset</span> <span class="o">=</span> <span class="n">datasets</span><span class="o">.</span><span class="n">CIFAR10</span><span class="p">(</span>
    <span class="n">root</span><span class="o">=</span><span class="s2">&quot;./data&quot;</span><span class="p">,</span>
    <span class="n">train</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
    <span class="n">download</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
    <span class="n">transform</span><span class="o">=</span><span class="n">transforms</span><span class="o">.</span><span class="n">Compose</span><span class="p">(</span>
        <span class="p">[</span>
            <span class="n">transforms</span><span class="o">.</span><span class="n">ToTensor</span><span class="p">(),</span>
            <span class="n">transforms</span><span class="o">.</span><span class="n">Normalize</span><span class="p">((</span><span class="mf">0.4914</span><span class="p">,</span> <span class="mf">0.4822</span><span class="p">,</span> <span class="mf">0.4465</span><span class="p">),</span> <span class="p">(</span><span class="mf">0.2023</span><span class="p">,</span> <span class="mf">0.1994</span><span class="p">,</span> <span class="mf">0.2010</span><span class="p">)),</span>
        <span class="p">]</span>
    <span class="p">),</span>
<span class="p">)</span>

<span class="n">testing_dataloader</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span>
    <span class="n">testing_dataset</span><span class="p">,</span>
    <span class="n">batch_size</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">batch_size</span><span class="p">,</span>
    <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
    <span class="n">num_workers</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
    <span class="n">drop_last</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="p">)</span>  <span class="c1"># set drop_last=True to drop the last incomplete batch for static shape `torchtrt.dynamo.compile()`</span>

<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
    <span class="k">with</span> <span class="n">export_torch_mode</span><span class="p">():</span>
        <span class="c1"># Compile the model with Torch-TensorRT Dynamo backend</span>
        <span class="n">input_tensor</span> <span class="o">=</span> <span class="n">images</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
        <span class="n">exp_program</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">export</span><span class="o">.</span><span class="n">export</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="p">(</span><span class="n">input_tensor</span><span class="p">,))</span>
        <span class="n">trt_model</span> <span class="o">=</span> <span class="n">torchtrt</span><span class="o">.</span><span class="n">dynamo</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span>
            <span class="n">exp_program</span><span class="p">,</span>
            <span class="n">inputs</span><span class="o">=</span><span class="p">[</span><span class="n">input_tensor</span><span class="p">],</span>
            <span class="n">enabled_precisions</span><span class="o">=</span><span class="p">{</span><span class="n">torch</span><span class="o">.</span><span class="n">float8_e4m3fn</span><span class="p">},</span>
            <span class="n">min_block_size</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
            <span class="n">debug</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="c1"># You can also use torch compile path to compile the model with Torch-TensorRT:</span>
        <span class="c1"># trt_model = torch.compile(model, backend=&quot;tensorrt&quot;)</span>

        <span class="c1"># Inference compiled Torch-TensorRT model over the testing dataset</span>
        <span class="n">total</span> <span class="o">=</span> <span class="mi">0</span>
        <span class="n">correct</span> <span class="o">=</span> <span class="mi">0</span>
        <span class="n">loss</span> <span class="o">=</span> <span class="mf">0.0</span>
        <span class="n">class_probs</span> <span class="o">=</span> <span class="p">[]</span>
        <span class="n">class_preds</span> <span class="o">=</span> <span class="p">[]</span>
        <span class="k">for</span> <span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="ow">in</span> <span class="n">testing_dataloader</span><span class="p">:</span>
            <span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">labels</span><span class="o">.</span><span class="n">cuda</span><span class="p">(</span><span class="n">non_blocking</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
            <span class="n">out</span> <span class="o">=</span> <span class="n">trt_model</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
            <span class="n">loss</span> <span class="o">+=</span> <span class="n">crit</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span>
            <span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
            <span class="n">class_probs</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">F</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">out</span><span class="p">])</span>
            <span class="n">class_preds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">preds</span><span class="p">)</span>
            <span class="n">total</span> <span class="o">+=</span> <span class="n">labels</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
            <span class="n">correct</span> <span class="o">+=</span> <span class="p">(</span><span class="n">preds</span> <span class="o">==</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>

        <span class="n">test_probs</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">torch</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">class_probs</span><span class="p">])</span>
        <span class="n">test_preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">(</span><span class="n">class_preds</span><span class="p">)</span>
        <span class="n">test_loss</span> <span class="o">=</span> <span class="n">loss</span> <span class="o">/</span> <span class="n">total</span>
        <span class="n">test_acc</span> <span class="o">=</span> <span class="n">correct</span> <span class="o">/</span> <span class="n">total</span>
        <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Test Loss: </span><span class="si">{:.5f}</span><span class="s2"> Test Acc: </span><span class="si">{:.2f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">test_loss</span><span class="p">,</span> <span class="mi">100</span> <span class="o">*</span> <span class="n">test_acc</span><span class="p">))</span>
</pre></div>
</div>
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.000 seconds)</p>
<div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-rendered-examples-dynamo-vgg16-fp8-ptq-py">
<div class="sphx-glr-download sphx-glr-download-python docutils container">
<p><a class="reference download internal" download="" href="../../../_downloads/6dc7949e3e7f3cb855e4a7b28eadc851/vgg16_fp8_ptq.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">vgg16_fp8_ptq.py</span></code></a></p>
</div>
<div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
<p><a class="reference download internal" download="" href="../../../_downloads/8d74c66ac043702cc076cfc2479adc0f/vgg16_fp8_ptq.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">vgg16_fp8_ptq.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</section>
</section>


             </article>
             
            </div>
            <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="../../../dynamo/torch_compile.html" class="btn btn-neutral float-right" title="TensorRT Backend for torch.compile" accesskey="n" rel="next">Next <img src="../../../_static/images/chevron-right-orange.svg" class="next-page"></a>
      
      
        <a href="torch_compile_advanced_usage.html" class="btn btn-neutral" title="Torch Compile Advanced Usage" accesskey="p" rel="prev"><img src="../../../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
      
    </div>
  

  

    <hr>

  

  <div role="contentinfo">
    <p>
        &copy; Copyright 2024, NVIDIA Corporation.

    </p>
  </div>
    
      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>
     

</footer>

          </div>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">Deploy Quantized Models using Torch-TensorRT</a><ul>
<li><a class="reference internal" href="#imports-and-model-definition">Imports and Model Definition</a></li>
<li><a class="reference internal" href="#load-the-pre-trained-model-weights">Load the pre-trained model weights</a></li>
<li><a class="reference internal" href="#load-training-dataset-and-define-loss-function-for-ptq">Load training dataset and define loss function for PTQ</a></li>
<li><a class="reference internal" href="#define-calibration-loop-for-quantization">Define Calibration Loop for quantization</a></li>
<li><a class="reference internal" href="#tune-the-pre-trained-model-with-fp8-and-ptq">Tune the pre-trained model with FP8 and PTQ</a></li>
<li><a class="reference internal" href="#inference">Inference</a></li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>

  


  

     
       <script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
         <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
         <script src="../../../_static/jquery.js"></script>
         <script src="../../../_static/underscore.js"></script>
         <script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
         <script src="../../../_static/doctools.js"></script>
         <script src="../../../_static/collapsible-lists/js/CollapsibleLists.compressed.js"></script>
         <script src="../../../_static/collapsible-lists/js/apply-collapsible-lists.js"></script>
         <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
     

  

  <script type="text/javascript" src="../../../_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="../../../_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="../../../_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script> 

  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">Stay up to date</li>
            <li><a href="https://www.facebook.com/pytorch" target="_blank">Facebook</a></li>
            <li><a href="https://twitter.com/pytorch" target="_blank">Twitter</a></li>
            <li><a href="https://www.youtube.com/pytorch" target="_blank">YouTube</a></li>
            <li><a href="https://www.linkedin.com/company/pytorch" target="_blank">LinkedIn</a></li>
          </ul>  
          </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">PyTorch Podcasts</li>
            <li><a href="https://open.spotify.com/show/6UzHKeiy368jKfQMKKvJY5" target="_blank">Spotify</a></li>
            <li><a href="https://podcasts.apple.com/us/podcast/pytorch-developer-podcast/id1566080008" target="_blank">Apple</a></li>
            <li><a href="https://www.google.com/podcasts?feed=aHR0cHM6Ly9mZWVkcy5zaW1wbGVjYXN0LmNvbS9PQjVGa0lsOA%3D%3D" target="_blank">Google</a></li>
            <li><a href="https://music.amazon.com/podcasts/7a4e6f0e-26c2-49e9-a478-41bd244197d0/PyTorch-Developer-Podcast?" target="_blank">Amazon</a></li>
          </ul>
         </div>
        </div>
        
        <div class="privacy-policy">
          <ul>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/terms/" target="_blank">Terms</a></li>
            <li class="privacy-policy-links">|</li>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/privacy-policy/" target="_blank">Privacy</a></li>
          </ul>
        </div>
        <div class="copyright">
        <p>© Copyright The Linux Foundation. The PyTorch Foundation is a project of The Linux Foundation.
          For web site terms of use, trademark policy and other policies applicable to The PyTorch Foundation please see
          <a href="https://www.linuxfoundation.org/policies/">www.linuxfoundation.org/policies/</a>. The PyTorch Foundation supports the PyTorch open source
          project, which has been established as PyTorch Project a Series of LF Projects, LLC. For policies applicable to the PyTorch Project a Series of LF Projects, LLC,
          please see <a href="https://www.lfprojects.org/policies/">www.lfprojects.org/policies/</a>.</p>
      </div>
     </div>

  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="../../../_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
           <li class="resources-mobile-menu-title">
             <a>Learn</a>
           </li>
           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/get-started">Get Started</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials">Tutorials</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/beginner/basics/intro.html">Learn the Basics</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/recipes/recipes_index.html">PyTorch Recipes</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/beginner/introyt.html">Introduction to PyTorch - YouTube Series</a>
             </li>
           </ul>
           <li class="resources-mobile-menu-title">
             <a>Ecosystem</a>
           </li>
           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/ecosystem">Tools</a>
             </li>
             <li>
               <a href="https://pytorch.org/#community-module">Community</a>
             </li>
             <li>
               <a href="https://discuss.pytorch.org/">Forums</a>
             </li>
             <li>
               <a href="https://pytorch.org/resources">Developer Resources</a>
             </li>
             <li>
               <a href="https://pytorch.org/ecosystem/contributor-awards-2023">Contributor Awards - 2023</a>
             </li>
           </ul>

           <li class="resources-mobile-menu-title">
             <a>Edge</a>
           </li>

           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/edge">About PyTorch Edge</a>
             </li>
             
             <li>
               <a href="https://pytorch.org/executorch-overview">ExecuTorch</a>
             </li>
           </ul>

           <li class="resources-mobile-menu-title">
             <a>Docs</a>
           </li>

           <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/docs/stable/index.html">PyTorch</a>
            </li>

            <li>
              <a href="https://pytorch.org/pytorch-domains">PyTorch Domains</a>
            </li>
          </ul>

          <li class="resources-mobile-menu-title">
            <a>Blog & News</a>
          </li>
            
           <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/blog/">PyTorch Blog</a>
            </li>
            <li>
              <a href="https://pytorch.org/community-blog">Community Blog</a>
            </li>

            <li>
              <a href="https://pytorch.org/videos">Videos</a>
            </li>

            <li>
              <a href="https://pytorch.org/community-stories">Community Stories</a>
            </li>
            <li>
              <a href="https://pytorch.org/events">Events</a>
            </li>
          </ul>
          
          <li class="resources-mobile-menu-title">
            <a>About</a>
          </li>

          <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/foundation">PyTorch Foundation</a>
            </li>
            <li>
              <a href="https://pytorch.org/governing-board">Governing Board</a>
            </li>
          </ul>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="../../../_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>